## /Users/aviral/projects/promisebreaker-experiment/experiment/report/paper/data/arg_ref.fst
## /Users/aviral/projects/promisebreaker-experiment/experiment/report/paper/data/corpus
## /Users/aviral/projects/promisebreaker-experiment/experiment/report/paper/data/parameters.fst
## /Users/aviral/projects/promisebreaker-experiment/experiment/report/paper/data/metaprogramming.fst
## /Users/aviral/projects/promisebreaker-experiment/experiment/report/paper/data/functions.fst
## /Users/aviral/projects/promisebreaker-experiment/experiment/report/paper/data/argument_type.fst
## /Users/aviral/projects/promisebreaker-experiment/experiment/report/paper/data/direct_effects.fst
## /Users/aviral/projects/promisebreaker-experiment/experiment/report/paper/data/indirect_effects.fst
## Corpus size:  500
## /Users/aviral/projects/promisebreaker-experiment/experiment/report/paper/data/client
## Client size:  2000

1 Runnable Code

##  chr [1:76921] "example/abind/abind.R" "example/abind/acorn.R" ...
## /Users/aviral/projects/promisebreaker-experiment/experiment/report/paper/data/sloc-corpus.fst
## [1] "testthat" "vignette" "test"     "example"
## [1] "\\CorpusExampleCount"  "\\CorpusTestCount"     "\\CorpusVignetteCount"
## [4] "\\ClientExampleCount"  "\\ClientTestCount"     "\\ClientVignetteCount"
## [1] "\\CorpusExampleCode"  "\\CorpusTestCode"     "\\CorpusVignetteCode"
## [4] "\\ClientExampleCode"  "\\ClientTestCode"     "\\ClientVignetteCode"

2 Package Code

## /Users/aviral/projects/promisebreaker-experiment/experiment/report/paper/data/sloc-package.fst
## [1] "\\ClientNativeCode" "\\ClientRCode"      "\\CorpusNativeCode"
## [4] "\\CorpusRCode"

3 General Corpus Info

## /Users/aviral/projects/promisebreaker-experiment/experiment/report/paper/data/package-dependency.fst
## 'data.frame':    64135 obs. of  2 variables:
##  $ package: chr  "xtable" "pbapply" "DT" "networkD3" ...
##  $ client : chr  "A3" "A3" "aaSEA" "aaSEA" ...
## [1] "\\CorpusDependencyCount"
## [1] "\\CorpusMaximumDependencyPackage" "\\CorpusMaximumDependencyCount"
## [1] "\\CorpusMinimumDependencyPackage" "\\CorpusMinimumDependencyCount"

4 Package Functions

## [1] "\\CorpusMaximumFunctionPackage" "\\CorpusMaximumFunctionCount"
## [1] "\\CorpusTwentyFivePackageCount"  "\\CorpusFiveHundredPackageCount"
## [1] "\\TotalFunctionCount"
## 1 - 25&161\ 26 - 50&89\ 51 - 75&49\ 76 - 100&32\ 101 - 125&29\ 126 - 150&19\ 151 - 175&24\ 176 - 200&13\ 201 - 225&15\ 226 - 250&10\ 251 - 275&8\ 276 - 300&9\ 301 - 325&11\ 326 - 350&1\ 351 - 375&4\ 376 - 400&3\ 401 - 425&3\ 426 - 450&1\ 451 - 475&2\ 476 - 500&1\ 501 - 525&3\ 526 - 550&2\ 626 - 650&1\ 701 - 725&1\ 726 - 750&1\ 751 - 775&2\ 776 - 800&1\ 851 - 875&1\ 876 - 900&1\

5 Calls

## [1] "\\TotalCallCount"        "\\CalledOneFunctionPerc"
## [3] "\\CalledTenFunctionPerc"

## 'data.frame':    203741 obs. of  47 variables:
##  $ qual_name   : chr  "abind*$#$*abind" "abind*$#$*abind" "abind*$#$*abind" "abind*$#$*abind" ...
##  $ anonymous   : logi  FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ formal_pos  : int  0 1 2 3 4 5 6 7 8 9 ...
##  $ arg_name    : chr  "..." "along" "rev.along" "new.names" ...
##  $ vararg      : logi  TRUE FALSE FALSE FALSE FALSE FALSE ...
##  $ missing     : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ call_count  : int  591 591 591 591 591 591 591 591 591 591 ...
##  $ escaped     : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ force_tot   : int  0 577 581 580 581 560 557 374 591 580 ...
##  $ force_cap   : int  0 577 581 580 581 560 557 374 591 580 ...
##  $ force_esc   : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ force_con   : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ lookup_tot  : int  0 39 0 0 0 557 0 0 0 0 ...
##  $ lookup_cap  : int  0 577 4 580 0 8 0 95 591 580 ...
##  $ lookup_esc  : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ lookup_con  : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ meta_tot    : int  0 266 0 0 0 0 0 0 0 0 ...
##  $ meta_cap    : int  0 266 0 0 0 0 0 0 0 0 ...
##  $ meta_esc    : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ assign_self : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ assign_tot  : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ define_self : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ define_tot  : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ remove_self : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ remove_tot  : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ error_self  : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ error_tot   : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ lookup_self : int  0 39 0 0 0 557 0 0 0 0 ...
##  $ as_env_self : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ as_env_tot  : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ pos_env_self: int  0 0 0 0 0 0 0 0 0 0 ...
##  $ pos_env_tot : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ pack_name   : chr  "abind" "abind" "abind" "abind" ...
##  $ fun_name    : chr  "`abind`" "`abind`" "`abind`" "`abind`" ...
##  $ outer       : logi  TRUE TRUE TRUE TRUE TRUE TRUE ...
##  $ vararg_lazy : logi  TRUE FALSE FALSE FALSE FALSE FALSE ...
##  $ force_lazy  : logi  TRUE TRUE TRUE TRUE TRUE TRUE ...
##  $ meta_lazy   : logi  FALSE TRUE FALSE FALSE FALSE FALSE ...
##  $ assign_lazy : logi  FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ define_lazy : logi  FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ remove_lazy : logi  FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ error_lazy  : logi  FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ lookup_lazy : logi  FALSE TRUE FALSE FALSE FALSE TRUE ...
##  $ effect_lazy : logi  FALSE TRUE FALSE FALSE FALSE TRUE ...
##  $ as_env_lazy : logi  FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ pos_env_lazy: logi  FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ ref_lazy    : logi  FALSE FALSE FALSE FALSE FALSE FALSE ...

6 Parameters

## [1] "\\TotalArgumentCount"
## [1] "\\TotalParameterCount"
## /Users/aviral/projects/promisebreaker-experiment/experiment/report/paper/data/package-info.fst
## 'data.frame':    747236 obs. of  6 variables:
##  $ package   : chr  "A3" "A3" "A3" "A3" ...
##  $ path      : chr  "/mnt/nvme0/aviral/promisebreaker-experiment/dependency/library/install/A3" "/mnt/nvme0/aviral/promisebreaker-experiment/dependency/library/install/A3" "/mnt/nvme0/aviral/promisebreaker-experiment/dependency/library/install/A3" "/mnt/nvme0/aviral/promisebreaker-experiment/dependency/library/install/A3" ...
##  $ funname   : chr  "a3" "a3.base" "a3.gen.autocor" "a3.gen.bootstrap" ...
##  $ exported  : logi  TRUE TRUE TRUE TRUE TRUE TRUE ...
##  $ arity     : int  5 10 2 2 2 2 2 4 4 2 ...
##  $ parameters: chr  "expression(formula = , data = , model.fn = , model.args = list(), ... = )" "expression(formula = , data = , model.fn = , simulate.fn = , n.folds = 10, data.generating.fn = replicate(ncol("| __truncated__ "expression(x = , n.reps = )" "expression(x = , n.reps = )" ...
## [1] "\\ZeroArityFunctionPerc"   "\\OneArityFunctionPerc"   
## [3] "\\TenArityFunctionPerc"    "\\FiftyArityFunctionCount"
## [1] "\\MaxArityFunctionName"       "\\MaxArityFunctionParamCount"

corpus_argument_type <-
    argument_type %>%
    filter(pack_name %in% corpus) %>%
    count(vararg, missing, arg_type, wt = argument_count, name = "argument_count")

datatable(corpus_argument_type)

package_info <- 
    package_info %>%
    group_by(package, funname) %>%
    summarize(param_count = n()) %>%
    mutate(funname = paste0("`", funname, "`")) %>%
    ungroup()
    
param_dist <-
    functions %>%
    filter(pack_name %in% corpus) %>%
    left_join(package_info, by = c("pack_name" = "package", "fun_name" = "funname")) %>%
    select(pack_name, fun_name, param_count, fun_def)

datatable(param_dist %>% select(!fun_def))

## TODO: not all functions with NA param_count are 0 argument functions


param_dist <-
    param_dist %>%
    mutate(param_count = if_else(is.na(param_count), 0L, param_count))

param_dist %>%
filter(param_count > 50) %>%
select(pack_name, fun_name, param_count) %>%
pmap_chr(function(pack_name, fun_name, param_count) paste0(pack_name, "::", fun_name, "->", param_count)) %>%
print()

param_dist %>%
    group_by(param_count) %>%
    summarize(package_count = length(unique(pack_name)), packages = paste(unique(pack_name), collapse = ",")) %>%
    ungroup() %>%
    datatable()

param_dist <-
    param_dist %>%
    count(param_count, name = "fun_count") %>%
    arrange(desc(fun_count)) %>%
    mutate(cumperc = round(100 * cumsum(fun_count) / sum(fun_count), 2))
    
datatable(param_dist)

cat("Total parameter positions: ", sum(param_dist$param_count * param_dist$fun_count), "\n")

param_dist <-
    param_dist %>%
    mutate(param_count = if_else(param_count <= 10, as.character(param_count), "> 10")) %>%
    count(param_count, wt = fun_count, name = "fun_count") %>%
    mutate(relative_fun_count = fun_count/sum(fun_count))
    
datatable(param_dist)

total_function_count <- sum(param_dist$fun_count)

param_dist_plot <-
    ggplot(param_dist, aes(param_count, relative_fun_count)) + 
    geom_col(fill = "black") +
    scale_y_continuous(sec.axis = sec_axis(~ . * total_function_count,
                                           labels = label_number_si()),
                       labels = label_percent()) +
    scale_x_discrete(limits = c(0:10, "> 10")) +
    coord_flip()+
    labs(x = "Parameters", y = "Functions")

save_graph(param_dist_plot, "param_dist")